Random KFG Uniform Distribution:
    —  Pendant Pendant Sum Monte Carlo Simulation


1. Creating a Random Uniform Distribution

1.1 The Existing FieldGuide Distribution

The first dataframe to build is a database of sums using the khipus in the existing KFG.

Code
import math
import random
from random import choices

import numpy as np
import pandas as pd
import khipu_kamayuq as kamayuq  # A Khipu Maker is known (in Quechua) as a Khipu Kamayuq
import khipu_qollqa as kq
from pandas import Series, DataFrame

# Plotly
import plotly
from plotly.offline import iplot, init_notebook_mode
import plotly.graph_objs as go
import plotly.express as px
import plotly.figure_factory as ff
plotly.offline.init_notebook_mode(connected = False)

from monte_carlo import DiscreteDistributionSampler, PendantSummer, StrawmanKhipu
Code
(khipu_dict, all_khipus) = kamayuq.fetch_khipus()
strawmen_kfg_khipu = [StrawmanKhipu(aKhipu.name(), "KFG", [aCord.knotted_value() for aCord in aKhipu.pendant_cords()]) for aKhipu in all_khipus]
strawmen_kfg_df = pd.DataFrame([aStrawmanKhipu.dataframe_tuple() for aStrawmanKhipu in strawmen_kfg_khipu], columns=StrawmanKhipu.dataframe_columns())
strawmen_kfg_df.head()
name source num_pendants mean_cord_value stdev_cord_value num_right_sums num_left_sums num_sums mean_num_summands stdev_num_summands mean_sum_value stdev_sum_value num_sums_per_nonzero_pendant mean_right_handedness stdev_right_handedness mean_left_handedness stdev_left_handedness
0 AS010 KFG 27 8 10.392305 3 2 5 3.0 1.000000 22.0 9.695360 0.227273 4.333333 0.577350 -6.5 4.949747
1 AS011 KFG 15 92 183.904867 0 0 0 0.0 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.0 0.000000
2 AS012 KFG 85 2 5.196152 3 4 7 6.0 2.449490 18.0 7.348469 0.233333 10.333333 6.027714 -9.0 5.099020
3 AS013 KFG 90 4 14.456832 0 5 5 14.0 14.456832 48.0 44.508426 0.121951 0.000000 0.000000 -19.0 8.860023
4 AS014 KFG 42 53 40.137264 1 2 3 2.0 0.000000 99.0 7.000000 0.071429 17.000000 0.000000 -15.0 4.242641
Code
total_right_sums = sum(strawmen_kfg_df.num_right_sums.tolist())
total_left_sums = sum(strawmen_kfg_df.num_left_sums.tolist())
print(f"{total_right_sums} right sums and {total_left_sums} left sums")
total_sums = total_right_sums + total_left_sums

left_pct = round(100.0*float(total_left_sums)/float(total_sums)) if total_sums > 0 else 0
right_pct = round(100.0*float(total_right_sums)/float(total_sums)) if total_sums > 0 else 0

(left_handed_mean, right_handed_mean) = (round(strawmen_kfg_df.mean_left_handedness.mean(),1), round(strawmen_kfg_df.mean_right_handedness.mean(),1))
(left_handed_stdev, right_handed_stdev) = (round(strawmen_kfg_df.mean_left_handedness.std(),1), round(strawmen_kfg_df.mean_right_handedness.std(),1))

print(f"Existing KFG - Right/Left Distribution = {right_pct}%/{left_pct}% ({total_right_sums}/{total_left_sums=})")
print(f"             - Right/Left Mean Handedness = {right_handed_mean}/{left_handed_mean} ±({right_handed_stdev}/{left_handed_stdev})")

strawmen_kfg_df.describe()
4354 right sums and 3734 left sums
Existing KFG - Right/Left Distribution = 54%/46% (4354/total_left_sums=3734)
             - Right/Left Mean Handedness = 9.9/-8.5 ±(15.0/14.2)
num_pendants mean_cord_value stdev_cord_value num_right_sums num_left_sums num_sums mean_num_summands stdev_num_summands mean_sum_value stdev_sum_value num_sums_per_nonzero_pendant mean_right_handedness stdev_right_handedness mean_left_handedness stdev_left_handedness
count 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000
mean 64.109231 309.835385 496.113591 6.698462 5.744615 12.443077 3.156923 2.032227 146.203077 70.992234 0.160933 9.914679 6.387789 -8.511022 5.777778
std 102.325678 1637.638528 2778.219238 13.630774 12.081363 25.319237 3.620689 3.925252 1052.163975 224.887696 0.179440 15.025712 12.766330 14.176137 11.853944
min 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -123.303371 0.000000
25% 14.000000 5.000000 7.071068 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -11.555556 0.000000
50% 32.000000 19.500000 26.267844 1.000000 1.000000 2.000000 2.000000 0.000000 25.000000 5.291503 0.105823 5.666667 0.000000 -3.333333 0.000000
75% 77.750000 78.750000 134.914450 6.000000 5.000000 11.750000 5.000000 2.828427 67.000000 41.225340 0.250000 13.925926 7.771955 0.000000 7.681053
max 1650.000000 26324.000000 42206.089632 105.000000 101.000000 200.000000 27.000000 37.094474 24072.000000 3210.316028 0.823009 150.525773 129.388714 0.000000 117.441998

1.2 Strawman Khipus based on a Random Uniform Distribution

Next, we make a dataframe based on randomly generated khipus that has:

  1. The same number of khipus as the KFG
  2. A pendant cord count chosen randomly from the existing khipus’ pendant cord counts
  3. Pendant values that are randomly generated from a uniform distribution.
Code
cords_per_khipu = [aKhipu.num_pendant_cords() for aKhipu in all_khipus]
# Produce a khipu with # of cords sampled from the KFG khipu distribution
def sample_kfg_num_cords():
    num_cords = choices(cords_per_khipu,k=1)[0]
    if num_cords<3: num_cords = 3 # Mutate trivial khipus.
    return num_cords

# Number of random strawman khipus to produce
num_dummy_khipus = len(all_khipus)

# UNIFORM SAMPLE MATCH # 
# Note that although the maximum value of cord in KFG is 26324, a more representative limit is 1000.
# Use NON_ZERO cords in the khipu database as sample values
cord_values = []
for aKhipu in all_khipus:
    cord_values += [aCord.knotted_value() for aCord in aKhipu.pendant_cords() if aCord.knotted_value() > 0]
sampler = DiscreteDistributionSampler(cord_values)

random_names = [f"runif_khipu_{i:05d}" for i in range(num_dummy_khipus)]
strawmen_runif_khipu = []
for i in range(num_dummy_khipus):
    # Here we only sample cords from 1 to 2000, since there are few cords above 2000
    random_cords = [random.randint(1, 2000) for _ in range(sample_kfg_num_cords())]
                   #[round(x) for x in  sampler.uniform_sample(sample_kfg_num_cords())]
    strawmen_runif_khipu.append( StrawmanKhipu(random_names[i], "runif", random_cords) )
strawmen_runif_df = pd.DataFrame([aStrawmanKhipu.dataframe_tuple() for aStrawmanKhipu in strawmen_runif_khipu], columns=StrawmanKhipu.dataframe_columns())
strawmen_runif_df.head()
name source num_pendants mean_cord_value stdev_cord_value num_right_sums num_left_sums num_sums mean_num_summands stdev_num_summands mean_sum_value stdev_sum_value num_sums_per_nonzero_pendant mean_right_handedness stdev_right_handedness mean_left_handedness stdev_left_handedness
0 runif_khipu_00000 runif 18 989.333333 412.331962 0 0 0 0.0 0.0 0.0 0.0 0.000000 0.0 0.0 0.0 0.0
1 runif_khipu_00001 runif 12 945.666667 561.967539 0 0 0 0.0 0.0 0.0 0.0 0.000000 0.0 0.0 0.0 0.0
2 runif_khipu_00002 runif 81 1032.975309 588.322764 1 0 1 2.0 0.0 1188.0 0.0 0.012346 46.0 0.0 0.0 0.0
3 runif_khipu_00003 runif 15 944.200000 558.606583 0 0 0 0.0 0.0 0.0 0.0 0.000000 0.0 0.0 0.0 0.0
4 runif_khipu_00004 runif 24 998.416667 632.610390 0 0 0 0.0 0.0 0.0 0.0 0.000000 0.0 0.0 0.0 0.0
Code
total_right_sums = sum(strawmen_runif_df.num_right_sums.tolist())
total_left_sums = sum(strawmen_runif_df.num_left_sums.tolist())
total_sums = total_right_sums + total_left_sums
print(f"{total_right_sums=} {total_left_sums=}")
left_pct = round(100.0*float(total_left_sums)/float(total_sums)) if total_sums > 0 else 0
right_pct = round(100.0*float(total_right_sums)/float(total_sums)) if total_sums > 0 else 0
(left_handed_mean, right_handed_mean) = (round(strawmen_runif_df.mean_left_handedness.mean(),1), round(strawmen_runif_df.mean_right_handedness.mean(),1))
(left_handed_stdev, right_handed_stdev) = (round(strawmen_runif_df.mean_left_handedness.std(),1), round(strawmen_runif_df.mean_right_handedness.std(),1))

print(f"Random Uniform - Right/Left Distribution = {right_pct}%/{left_pct}% ({total_right_sums}/{total_left_sums=})")
print(f"               - Right/Left Mean Handedness = {right_handed_mean}/{left_handed_mean} ±({right_handed_stdev}/{left_handed_stdev})")

strawmen_runif_df.describe()
total_right_sums=1940 total_left_sums=1858
Random Uniform - Right/Left Distribution = 51%/49% (1940/total_left_sums=1858)
               - Right/Left Mean Handedness = 16.6/-15.9 ±(39.8/38.9)
num_pendants mean_cord_value stdev_cord_value num_right_sums num_left_sums num_sums mean_num_summands stdev_num_summands mean_sum_value stdev_sum_value num_sums_per_nonzero_pendant mean_right_handedness stdev_right_handedness mean_left_handedness stdev_left_handedness
count 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000
mean 68.235385 994.092289 569.039461 2.984615 2.858462 5.843077 0.946154 0.026115 609.397333 122.356884 0.021371 16.570869 8.879299 -15.938130 9.126179
std 131.938965 132.460267 83.906330 21.178886 20.374876 41.500664 1.126181 0.173590 726.788281 212.920709 0.037970 39.753211 29.803437 38.901977 29.663151
min 3.000000 424.666667 137.720732 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -418.686207 0.000000
25% 15.000000 935.419317 543.840313 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -15.375000 0.000000
50% 32.500000 998.589815 579.969655 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
75% 80.000000 1060.297297 604.767864 1.000000 1.000000 2.000000 2.000000 0.000000 1380.250000 223.711395 0.035609 19.375000 0.000000 0.000000 0.000000
max 1650.000000 1772.750000 899.973889 310.000000 303.000000 600.000000 5.000000 1.732051 1997.000000 1020.502001 0.363636 443.208754 352.748030 0.000000 350.818630

Note that the random uniform sums have a greater handedness overall, and a much greater standard deviation.

2. Random Uniform vs. Existing KFG - Graphical Distribution

To graphically compare the distributions of the random khipus with existing khipus, one dataframe is needed:

Code
def source_color(x): return 0.0 if x == "KFG" else 1.0

combined_kfg_runif_df = pd.concat([strawmen_kfg_df, strawmen_runif_df], axis=0)
combined_kfg_runif_df['source_color'] = [source_color(x) for x in combined_kfg_runif_df.source.values]
Code
kfg_left =  sum(strawmen_kfg_df.num_left_sums.tolist())
kfg_right = sum(strawmen_kfg_df.num_right_sums.tolist())
pct_kfg_left = round(100.0*float(kfg_left)/float(kfg_left+kfg_right))
pct_kfg_right = round(100.0*float(kfg_right)/float(kfg_left+kfg_right))
runif_left = sum(strawmen_runif_df.num_left_sums.tolist())
runif_right = sum(strawmen_runif_df.num_right_sums.tolist())
pct_runif_left = round(100.0*float(runif_left)/float(runif_left+runif_right)) if (runif_left+runif_right) > 0 else 0
pct_runif_right = round(100.0*float(runif_right)/float(runif_left+runif_right)) if (runif_left+runif_right) > 0 else 0

print(f"Num Right/Left Sums for Existing KFG:{kfg_right}/{kfg_left} ({pct_kfg_right}%/{pct_kfg_left}%)")
print(f"Num Right/Left Sums for Random Uniform: {runif_right}/{runif_left} ({pct_runif_right}%/{pct_runif_left}%)")
Num Right/Left Sums for Existing KFG:4354/3734 (54%/46%)
Num Right/Left Sums for Random Uniform: 1940/1858 (51%/49%)
Code
legend_text = "<b>Random Uniform vs KFG - #Sums:</b><i style=\"font-size:10pt;\"> Blue-KFG, Red-Random, Size-#Summands</i>"
fig = (px.scatter(combined_kfg_runif_df, x="num_right_sums", y="num_left_sums", log_y=True,log_x=True,
                 size="mean_num_summands",
                 opacity=.4, 
                 color='source_color', color_continuous_scale=['#3c3fff', '#ff3030',],
                 labels={"name": f"Khipu Name"},
                 hover_data=['name'], title=legend_text,
                 width=944, height=944)
        .update_layout(showlegend=False).update(layout_coloraxis_showscale=False).show()
      )

As expected, random sums that are small occur more. They also have fewer summands. Let’s evaluate the number of summands for the random khipus vs. the existing khipus.

Code
legend_text = "<b>Random Uniform vs KFG - #Sums vs #Summands:</b><i style=\"font-size:10pt;\"> Blue-KFG, Red-Random, Size-#Sums/Pendan</i>"
fig = (px.scatter(combined_kfg_runif_df, x="mean_num_summands", y="num_sums", log_y=True,#log_x=True,
                 size="num_sums_per_nonzero_pendant",
                 opacity=.4, 
                 color='source_color', color_continuous_scale=['#3c3fff', '#ff3030',],
                 labels={"name": f"Khipu Name"},
                 hover_data=['name'], title=legend_text,
                 width=944, height=944)
        .update_layout(showlegend=False).update(layout_coloraxis_showscale=False).show()
      )

This echoes the previous statement about the number of summands being very different in the random uniform set! A relatively clear separation occurs.

Code
legend_text = "<b>Random Uniform vs KFG - Sum Handedness:</b><i style=\"font-size:10pt;\"> Blue-KFG, Red-Random, Size-#Summands</i>"
fig = (px.scatter(combined_kfg_runif_df, x="mean_left_handedness", y="mean_right_handedness",
                 size="mean_num_summands", 
                 opacity=0.3,
                 color='source_color', color_continuous_scale=['#3c3fff', '#ff3030', ],
                 labels={"name": f"Khipu Name"},
                 hover_data=['name'], title=legend_text,
                 width=944, height=944)
        .update_layout(showlegend=False).update(layout_coloraxis_showscale=False).show()
      )

Now we’re getting somewhere. Existing KFG Khipus have their sums close, for obvious reasons. However the randomly generated khipus have many more far sums, with a small number of summands.

Code
legend_text = "<b>Random Uniform vs KFG - Mean Sum vs #Sums/Pendant:</b><i style=\"font-size:10pt;\"> Blue-KFG, Red-Random, Size-#Pendant</i>"
fig = (px.scatter(combined_kfg_runif_df, x="num_sums_per_nonzero_pendant", y="mean_sum_value", log_y=True,
                 size="num_pendants", 
                 opacity=0.5,
                 color='source_color', color_continuous_scale=['#3c3fff', '#ff3030', ],
                 labels={"name": f"Khipu Name"},
                 hover_data=["name", 'num_sums'], title=legend_text,
                 width=944, height=944)
        .update_layout(showlegend=False).update(layout_coloraxis_showscale=False).show()
      )

3. Frequency Distributions

An examination of frequency distributions for key variables, using violin plots, where width=frequency and height=variable being measured.

Code
combined_kfg_runif_df['handedness_bias'] = [(abs(a)-abs(b)) for a,b in zip(combined_kfg_runif_df['num_right_sums'].values.tolist(), combined_kfg_runif_df['num_left_sums'].values.tolist())]
combined_kfg_runif_df['source'] = ["KFG" if source == 'KFG' else "Random Uniform" for source in combined_kfg_runif_df.source.values.tolist()]
legend_text = "<b>Violin Plot - Random Uniform vs KFG Handedness: (#RightHandedSums - #LeftHandedSums)</b>"
fig = (px.violin(combined_kfg_runif_df, y="handedness_bias",  
                 points='all', color="source",
                 hover_data=['name', 'num_sums'], title=legend_text,
                 width=944, height=944).show())
Code
combined_kfg_runif_df['log_sum_mean'] = [math.log(abs(x)) if x > 0 else 0 for x in combined_kfg_runif_df['mean_sum_value'].values.tolist()]
legend_text = "<b>Violin Plot - Random Uniform vs KFG -  Log(Sum Mean)</b>"
fig = (px.violin(combined_kfg_runif_df, y="log_sum_mean", 
                 points='all', color="source",
                 labels={"log_sum_mean": "log(Sum Mean)"},
                 hover_data=['name', 'num_sums', 'mean_sum_value'], title=legend_text,
                 width=944, height=944).show())
Code
combined_kfg_runif_df['handedness_bias'] = [(abs(a)-abs(b)) for a,b in zip(combined_kfg_runif_df['num_left_sums'].values.tolist(), combined_kfg_runif_df['num_right_sums'].values.tolist())]
combined_kfg_runif_df['source'] = ["KFG" if source == 'KFG' else "Random Uniform" for source in combined_kfg_runif_df.source.values.tolist()]
legend_text = "<b>Violin Plot - Random Uniform vs KFG - #Sums per Pendant</b>"
fig = (px.violin(combined_kfg_runif_df, y="num_sums_per_nonzero_pendant",  
                 points='all', color="source",
                 labels={"num_sums_per_nonzero_pendant": "#Sums per Pendant"},
                 hover_data=['name', 'num_sums', 'mean_sum_value'], title=legend_text,
                 width=944, height=944).show())
Code
combined_kfg_runif_df['handedness_bias'] = [(abs(a)-abs(b)) for a,b in zip(combined_kfg_runif_df['num_left_sums'].values.tolist(), combined_kfg_runif_df['num_right_sums'].values.tolist())]
combined_kfg_runif_df['source'] = ["KFG" if source == 'KFG' else "Random Uniform" for source in combined_kfg_runif_df.source.values.tolist()]
legend_text = "<b>Violin Plot - Random Uniform vs KFG - #Summands per Sum</b>"
fig = (px.violin(combined_kfg_runif_df, y="mean_num_summands",  
                 points='all', color="source",
                 labels={"mean_num_summands": "#Summands per Sum"},
                 hover_data=['name', 'num_sums', 'mean_sum_value'], title=legend_text,
                 width=944, height=944).show())

This is also as you would expect - randomly generated khipus have large sums, few summands per sum, and few sums per pendant cord.